#include "gumbowrap.h"
#include <stdio.h>
#include "ilog.h"
#include "simplebuf.h"
int htmldebug = 0;

const char *invalid = "INVALID";
const HtmlPathNode ENDNODE = __HTMLPATHNODEEND__;

/* >=0 success, <0 failed */
int GetTag(GumboNode *n)
{
	if (n == NULL)
		return GUMBO_ARGNULL;
	return n->v.element.tag;
}

/* >=0 success, <0 failed */
int GetLen(GumboNode *n)
{
	if (n == NULL)
		return GUMBO_ARGNULL;
	return n->v.element.children.length;
}
int FindChildByIndex(GumboNode *n, unsigned int index, GumboNode **child)
{
	unsigned int len;
	if (n == NULL || child == NULL)
		return GUMBO_ARGNULL;
	len = GetLen(n);
	if (index >= len || len < 0)
		return GUMBO_PARAMETER;
	*child = n->v.element.children.data[index];
	return 0;
}
int FindChildByTag(GumboNode *n, int tag, GumboNode **child)
{
	unsigned int len, i;

	if (n == NULL || child == NULL || tag < 0 || tag > GUMBO_TAG_LAST)
		return GUMBO_ARGNULL;

	len = GetLen(n);

	for (i = 0; i < len; i++) {
		*child = n->v.element.children.data[i];
		if (*child == NULL)
			continue;
		if (GetTag(*child) == tag) {
			break;
		}
	}
	if (i == len)
		return GUMBO_NOTFOUND;
	return 0;
}
int GetAttrByID(GumboNode *n, char *id, const char **value)
{
	GumboAttribute *attr = NULL;

	if (!n || !id || !value)
		return GUMBO_ARGNULL;

	attr = gumbo_get_attribute(&n->v.element.attributes, id);
	if (!attr)
		return GUMBO_NOTFOUND;

	*value = attr->value;
	return 0;
}
GumboAttribute* GetAttr(GumboNode *n, char *id)
{
	GumboAttribute *attr = NULL;

	if (!n || !id) 
		return NULL;

	attr = gumbo_get_attribute(&n->v.element.attributes, id);
	return attr;
}
int FindChildByTagAndAttr(GumboNode *n, int tag, char *id, char *value, GumboNode **child)
{	unsigned int len, i;
	int ret = 0;

	if (n == NULL || child == NULL || tag < 0 || tag > GUMBO_TAG_LAST)
		return GUMBO_ARGNULL;
	if (id == NULL || value == NULL)
		return GUMBO_ARGNULL;

	len = GetLen(n);

	for (i = 0; i < len; i++) {
		*child = n->v.element.children.data[i];
		if (*child == NULL)
			continue;
		if (GetTag(*child) == tag ) {
			const char *tempattrvalue = NULL;
			ret = GetAttrByID(*child, id, &tempattrvalue);
			if (ret)	return ret;
			if (strcmp(tempattrvalue, value) == 0) {
				break;
			}
		}
	}
	if (i == len)
		return GUMBO_NOTFOUND;
	return 0;
}
/* >=0 success, <0 failed */
int GetNodeType(GumboNode *n)
{
	if (n == NULL)
		return GUMBO_ARGNULL;
	return n->type;
}
int CheckAttr(GumboNode *root, HtmlPathNode *path)
{
	int i = 0;
	int ret = 0;
	if (root == NULL || path == NULL) 
		return GUMBO_PARAMETER;

	while(1) {
		if (strlen(path->attrname[i]) == 0 || strcmp(path->attrname[i], "") == 0) { 
			break;
		} 

		char *id = path->attrname[i];
		char *attrvalue = path->attrvalue[i];
		const char *start = NULL;
		const char *end = NULL;
		const char *tempattrvalue = NULL;

		ret = GetAttrByID(root, id, &tempattrvalue);
		if (ret) {
			return ret;
		}
		start = tempattrvalue;	
		end = tempattrvalue + strlen(tempattrvalue) - 1;
	
		while (*start == ' ' || *start == '\t' || *start == '\r' || *start == '\n')	
			start++;
		while (*end == ' ' || *end == '\t' || *end == '\r' || *end == '\n')	
			end--;
		if (start > end) {
			return GUMBO_ATTRERROR;
		}
		if (strncmp(start, attrvalue, end - start + 1) == 0) 
			return 0;
		else 
			return GUMBO_ATTRERROR;
		i++;

	}
	return 0;
}
/* 本函数只精确的路径查找，比如一个标签下有两个span,如果路径只规定TAG=span，则本函数
   只检查第一个满足条件的node进行递归 */
int WalkHtmlPathExact(GumboNode *root, HtmlPathNode *path, GumboNode **node)
{
#if DEBUG
printf("=================================================\n");
#endif
	HtmlPathNode *next = NULL;
	unsigned int len, i;
	int validnode = 0;
	int factindex = 0;
	int ret = 0;

	if (root == NULL || path == NULL || node == NULL)
		return GUMBO_NOTFOUND;
#if DEBUG 
	if (path->tag != UNDEFINED)
		printf("path->tag[%d],tagname[%s]\n", path->tag, gumbo_normalized_tagname(path->tag));
	if (path->index != UNDEFINED)
		printf("path->index[%d]\n", path->index); 
	i = 0;
	while(1) {
		if (strlen(path->attrname[i]) == 0 || strcmp(path->attrname[i], "") == 0) {
			break;
		}
		printf("name[%s]-value[%s]\n", path->attrname[i], path->attrvalue[i]);	
		i++;
	}
#endif
	next = path + 1;

	/* 如果没有定义只的是第一个 */
	if (path->index == UNDEFINED)
		factindex = 1;

	len = GetLen(root);
	for (i = 0; i < len; i++) {
		GumboNode *child = NULL;
		ret = FindChildByIndex(root, i, &child);
		if (ret) {
			return GUMBO_NOTFOUND;
		}
		/* 如果path中的type为text，一般都是最后一个可以不检查tag类型 */

		if (	( path->type != GUMBO_NODE_TEXT &&
				  path->type != GUMBO_NODE_WHITESPACE &&
				  path->type != GUMBO_NODE_CDATA && 
				  path->type != GUMBO_NODE_DOCUMENT 
				) 
				&& 
				( GetTag(child) <= 0 || 
				 GetTag(child) >= GUMBO_TAG_LAST
				)
		) 
		{
			continue;
		}
#if DEBUG                                                                                 
	printf("++++child[%d]start+++\n", i);
	if (child->type == GUMBO_NODE_ELEMENT) {
		printf("child[%d]->tag[%d]-[%s]\n", i, GetTag(child),gumbo_normalized_tagname(GetTag(child)));
		GumboVector *vec = NULL;
		vec = &child->v.element.attributes;
		if (vec) {
			int attrlen = vec->length;
			printf("attr cnt[%d]\n", attrlen);
			int j = 0;
			for (j = 0; j < attrlen; j++) {
				const char *aa = NULL;
				GumboAttribute *attr = (GumboAttribute*)vec->data[j];
				printf("Attrname[%s]-Attrvalue[%s]\n", attr->name, attr->value);
			}
		}
	}
	else if (child->type == GUMBO_NODE_TEXT) {
		printf("TEXT[%s]\n", child->v.text.text);
	}
	printf("----child[%d]end----\n", i);

#endif  

		/* 检查tag */
		int temptag = GetTag(child);
		if (path->tag != UNDEFINED) {
			if (temptag != path->tag) 
				continue;
			else
				validnode++;
		}
		else 
			validnode++;

		/* 检查index */
		if (path->index == UNDEFINED || validnode == path->index) {
			/* 检查type */
			if (path->type != UNDEFINED) {
				if (GetNodeType(child) != path->type) {
					return GUMBO_NOTFOUND;	
				}
			}
			/* 检查attr */
			if (CheckAttr(child, path)) {
				/* index 如果是 UNDEFINED 可以不对 */
				continue;
			}
			/* pass all check */
			/* 最后一个节点 */
			if (memcmp(next, &ENDNODE, sizeof(HtmlPathNode)) == 0) {
				*node = child;
				return 0;
			}
			else {
				return WalkHtmlPath(child, next, node); 	
			}
		}
	}

	return GUMBO_NOTFOUND;
}
int WalkHtmlPath(GumboNode *root, HtmlPathNode *path, GumboNode **node)
{
if (htmldebug) 
	printf("=================================================\n");

	HtmlPathNode *next = NULL;
	unsigned int len, i;
	int validnode = 0;
	int factindex = 0;
	int ret = 0;

	if (root == NULL || path == NULL || node == NULL)
		return GUMBO_NOTFOUND;

if (htmldebug) {
	printf("path 信息+++++++++\n");
	if (path->tag != UNDEFINED)
		printf("path->tag[%d] tagname[%s] ", path->tag, gumbo_normalized_tagname(path->tag));
	if (path->index != UNDEFINED)
		printf("path->index[%d]", path->index); 
	i = 0;
	printf("\n");
	while(1) {
		if (strlen(path->attrname[i]) == 0 || strcmp(path->attrname[i], "") == 0) {
			break;
		}
		printf("name[%s]-value[%s]\n", path->attrname[i], path->attrvalue[i]);	
		i++;
	}
	printf("path 信息--------\n");
}
	next = path + 1;

	/* 如果没有定义只的是第一个 */
	if (path->index == UNDEFINED)
		factindex = 1;

	len = GetLen(root);
	for (i = 0; i < len; i++) {
		GumboNode *child = NULL;
		ret = FindChildByIndex(root, i, &child);
		if (ret) {
			return GUMBO_NOTFOUND;
		}
		if (	( path->type != GUMBO_NODE_TEXT &&
				  path->type != GUMBO_NODE_WHITESPACE &&
				  path->type != GUMBO_NODE_CDATA && 
				  path->type != GUMBO_NODE_DOCUMENT 
				) 
				&& 
				( GetTag(child) <= 0 || 
				 GetTag(child) >= GUMBO_TAG_LAST
				)
		) 
		{
			continue;
		}
		/* 接下去node一定是element或者document */
if (htmldebug)  {
	printf("++++子标签[%d]start+++\n", i);
	if (child->type == GUMBO_NODE_ELEMENT) {
		printf("child[%d]->tag[%d]-[%s]\n", i, GetTag(child),gumbo_normalized_tagname(GetTag(child)));
		GumboVector *vec = NULL;
		vec = &child->v.element.attributes;
		if (vec) {
			int attrlen = vec->length;
			printf("attr cnt[%d]\n", attrlen);
			int j = 0;
			for (j = 0; j < attrlen; j++) {
				const char *aa = NULL;
				GumboAttribute *attr = (GumboAttribute*)vec->data[j];
				printf("Attrname[%s]-Attrvalue[%s]\n", attr->name, attr->value);
			}
		}
	}
	else if (child->type == GUMBO_NODE_TEXT) {
		printf("TEXT[%s]\n", child->v.text.text);
	}
	printf("----子标签[%d]end----\n", i);

}

		/* 检查tag */
		int temptag = GetTag(child);
		if (path->tag != UNDEFINED) {
			if (temptag != path->tag) 
				continue;
			else
				validnode++;
		}
		else 
			validnode++;
if (htmldebug)
	printf("validnode[%d]-index[%d]\n", validnode, path->index);
		 
		if (path->index != UNDEFINED && path->index  < validnode ) {
			break;
		}
		
		/* 检查index */
		if (path->index == UNDEFINED || validnode == path->index) {
			/* 检查type */
			if (path->type != UNDEFINED) {
				if (GetNodeType(child) != path->type) {
					return GUMBO_NOTFOUND;	
				}
			}
if (htmldebug)
	printf("okk");
			/* 检查attr */
			if (CheckAttr(child, path)) {
				/* index 如果是 UNDEFINED 可以不对 */
				continue;
			}
if (htmldebug)
	printf("okk1");
			/* pass all check */
			/* 最后一个节点 */
			if (memcmp(next, &ENDNODE, sizeof(HtmlPathNode)) == 0) {
				*node = child;
if (htmldebug)
	printf("okk3");
				return 0;
			}
			else {
				/* 如果满足path节点,但是后面路径搜索不成功则会继续搜索下一个 */
				if (WalkHtmlPath(child, next, node) == 0) {
					return 0;
					}
				else  {
					continue;
				}
			}
if (htmldebug)
	printf("okk2");
		}
	}

	return GUMBO_NOTFOUND;
}
/* 获得一个属性为TEXT的text */
int HtmlGetTextByPath(GumboNode *root, HtmlPathNode *path, char *text, int len)
{
	int ret = 0;
	GumboNode *node = NULL;

	if (root == NULL || path == NULL || len <= 0)
		return GUMBO_ARGNULL;

	ret = WalkHtmlPath(root, path, &node);
	if (ret) {
		return ret;
	}
	if (GetNodeType(node) != GUMBO_NODE_TEXT)
		return GUMBO_PARSEERROR;
	if (node == NULL)
		text[0] = '\0';	
	int vlen = strlen(node->v.text.text);
	if (vlen > len) 
		strncpy(text, node->v.text.text, len);
	else {
		strcpy(text, node->v.text.text);
		text[vlen] = '\0';
	}
	return 0;	
}
/* 从一个element中的attr属性中获得value */
int HtmlGetAttrValueByPath(GumboNode *root, HtmlPathNode *path, char *id, char *text, int len)
{
	int ret = 0;
	GumboNode *node = NULL;

	if (root == NULL || path == NULL || len <= 0 || id == NULL)
		return GUMBO_ARGNULL;

	ret = WalkHtmlPath(root, path, &node);
	if (ret) {
		return ret;
	}

	if (node == NULL)
		text[0] = '\0';	
	/* element 才有attr属性 */	
	if (GetNodeType(node) != GUMBO_NODE_ELEMENT)
		return GUMBO_PARSEERROR;

	const char *temptext = NULL;
	ret = GetAttrByID(node, id, &temptext);
	if (ret)
		return ret;

	int vlen = strlen(temptext);
	if (vlen > len)
		strncpy(text, temptext, len);
	else {
		strcpy(text, temptext);
		text[vlen] = '\0';
	}
	return 0;	
}

const char *TagEnumToName(GumboTag d)
{
	if (d < 0 || d >= GUMBO_TAG_LAST)
		return invalid;
	return gumbo_normalized_tagname(d);
}

const char *TypeEnumToName(GumboNodeType d)
{
	if (d < 0 || d > GUMBO_NODE_WHITESPACE)
		return invalid;
	switch(d) {
		/** Document node.  v will be a GumboDocument. */
		case GUMBO_NODE_DOCUMENT:
			return "DOCUMENT";
			/** Element node.  v will be a GumboElement. */
		case GUMBO_NODE_ELEMENT:
			return "ELEMENT";
			/** Text node.  v will be a GumboText. */
		case GUMBO_NODE_TEXT:
			return "TEXT";
			/** CDATA node. v will be a GumboText. */
		case GUMBO_NODE_CDATA:
			return "CDATA";
			/** Comment node.  v. will be a GumboText, excluding comment delimiters. */
		case GUMBO_NODE_COMMENT:
			return "COMMENT";
			/** Text node, where all contents is whitespace.  v will be a GumboText. */
		case GUMBO_NODE_WHITESPACE:
			return "WHITESPACE";
		default:
			return invalid;
	}
}
void GetAllAttrByTag(GumboNode *node, GumboTag tag, char* id, void (*func)(const char*))
{
	GumboVector *children = NULL;
	GumboNode *json = NULL;
	GumboAttribute *attr = NULL;
	int i;

	/* 当前节点属性不为ELEMENT直接返回 */
	if(node->type != GUMBO_NODE_ELEMENT)
		return;

	/* 获取当前节点class属性 */
	if (node->v.element.tag == tag) {
		/* 获得id属性 */
		if((attr=gumbo_get_attribute(&node->v.element.attributes, id)) != NULL)
		{
			if (strcmp(attr->value, id) == 0) {
				json=(GumboNode *)(&node->v.element.children)->data[0];
				if(json && json->type == GUMBO_NODE_TEXT) {
					/* 调用用户的函数 */
					if (func)
						func(json->v.text.text);
				}
			}
		}
	}
	/* 当前节点子节点 */
	children=&node->v.element.children;

	/* 查找 */
	for (i = 0; i < children->length; i++) {
		GetAllAttrByTag(children->data[i], tag, id, func);
	}

	return;
}
